This file documents our reanalysis of the dataset used to examine biodiversity and conflict relationships in the Southern Philippines presented in this paper https://doi.org/10.1038/s44185-024-00044-8

Load required packages

#make sure to install all packages including required dependencies prior to use
library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(rgdal)
library(raster)
library(lme4)
library(MASS)
library(piecewiseSEM)

Prepare datasets

#biodiversity data was obtained from here https://doi.org/10.15468/rtedgk
#and conflict data was from here https://data.humdata.org/dataset/ucdp-data-for-philippines?
#note that only a subset of this dataset (i.e., those from Mindanao and adjacent islands) was used

#biodiversity data
bio <- read.csv("mobios_tab.csv", header = TRUE, sep = ",")
#subset data
bio.sub <- subset(bio, select = c(12, 23, 20, 15, 16))
bio.sub <- bio.sub %>% filter(county!="") #remove rows with no county information
bio.sub <- bio.sub %>% filter(class!="Malacostraca") #exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Bivalvia") #exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Gastropoda")#exclude this taxon
#check province name
#there are rows where the name of the province is uncertain as indicated by "/"
#"Zamboanga del Norte" was changed to "Zamboanga del Norte" prior to import of data
unique(bio.sub$county)
##  [1] "Lanao del Norte"                                      
##  [2] "South Cotabato"                                       
##  [3] "North Cotabato"                                       
##  [4] "Camiguin Island"                                      
##  [5] "Maguindanao"                                          
##  [6] "Agusan del Sur"                                       
##  [7] "Bukidnon"                                             
##  [8] "Davao Oriental"                                       
##  [9] "Davao"                                                
## [10] "Misamis Occidental"                                   
## [11] "Surigao del Sur"                                      
## [12] "Davao del sur"                                        
## [13] "Lanao del Sur"                                        
## [14] "Sarangani"                                            
## [15] "Misamis Oriental"                                     
## [16] "Surigao del Norte"                                    
## [17] "Davao de Oro"                                         
## [18] "Davao del Norte"                                      
## [19] "Zamboanga del Sur"                                    
## [20] "Surigao del Norte/Agusan del Sur"                     
## [21] "Dinagat Island"                                       
## [22] "Basilan"                                              
## [23] "Tawi-Tawi"                                            
## [24] "Sulu"                                                 
## [25] "Sultan Kudarat"                                       
## [26] "Agusan del Norte"                                     
## [27] "Cagayan de Oro"                                       
## [28] "Bukidnon/Camiguin"                                    
## [29] "Bukidnon/Misamis Oriental/Iligan"                     
## [30] "Zamboanga del Norte"                                  
## [31] "Zamboanga Sibugay"                                    
## [32] "Misamis Occidental/Misamis Oriental/Camiguin/Bukidnon"
## [33] "Zamboanga City"                                       
## [34] "General Santos"                                       
## [35] "North Cotabato/Davao City"
#remove uncertain province names
bio.sub <- bio.sub %>% filter(!grepl('/', county))

head(bio.sub)
#conflict data
con <- read.csv("conflict_tab_89-21.csv", header = TRUE, sep = ",")
con.sub <- subset(con, select = c(1,3,15,18,30,32,33))

head(con.sub)
write.csv(con.sub, "con.sub.csv")

Count recorded species per taxon in each point and within province

#the methods of how species counts were done in each site was not provided in the paper
#according to the paper analysis was done at the provincial level
#there could be two different ways how the species counts were done (see figure below)
#this part is extremely important which the authors failed to discuss
#the left panel of the figure shows that species counts were aggregated at the provincial level 
#each point (within the province) receives the same value of species count (or species richness)
#the right panel shows that each point has a unique number of species count
#note that these values are crucial for other downstream analyses 
#particularly when relationships of species counts 
#and distance to the nearest conflict sites are considered
#we generated the values for these two scenarios below 
#and test whether any of them would match the results presented in the paper

knitr::include_graphics("bioconflict.png")

#count unique records (i.e., species richness) per point
bio.data.sum <- bio.sub %>%
              group_by(county, class, decimalLatitude, decimalLongitude) %>%
              mutate(NumbSp = n_distinct(scientificName))

head(bio.data.sum)
#export data
write.csv(bio.data.sum, "bio.data.sum.csv")

#count species records per province (this lumps all the data, 
#thus each point will have a single species richness value)
prov.bio.data.sum <- bio.sub %>%
                          group_by(county, class) %>%
                          mutate(NumbSp = n_distinct(scientificName))
head(prov.bio.data.sum)
#export data
write.csv(prov.bio.data.sum, "prov.data.sum.csv")

Calculate the distance of species record to the nearest conflict site/s using QGIS

#in QGIS, import the "bio.data.sum" and "con.sub" from R as delimited text files. 
#export these files as GeoPackage or Shapefile to convert the data to meters prior to calculation
#use the CRS UTM Zone 51N
knitr::include_graphics("qgis_geo.png")

knitr::include_graphics("qgis_utm.png")

#import back the newly saved data to QGIS
#get the distance of nearest conflict areas using the "Joint attributes by nearest" plugin
#QGIS > Processing Toolbox > Join attributes by nearest
knitr::include_graphics("qgis_join.png")

#distance can be calculated as well without joining the attributes using the "Distance to nearest hub"
#after calculating the distance, a new layer will be created. 
#the new layer contains the calculated distance in the attribute table
#this also includes the x and y coordinates of the nearest site/point
#then export this new layer as a csv file so we can use the data in R for other analysis
#repeat the entire process for provincial data (i.e., prov.bio.data.sum)
#figure below shows the nearest conflict point to species record (black line)
#notice the remainder of conflict points are not included
knitr::include_graphics("hub.png")

Calculate the average distance

#species richness per point in each province
#read the file with calculated distance from QGIS
bio.con.dist <- read.csv("bio.con.dist.csv", header = TRUE, sep = ",")
#get average distance by province for each taxon
mean.dist <- bio.con.dist %>%
                group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
                summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
head(mean.dist)
write.csv(mean.dist, "mean.distance.csv")

#species richness lumped per province
#read the file with calculated distance in QGIS
prov.bio.con.dist <- read.csv("prov.bio.con.dist.csv", header = TRUE, sep = ",")
#get average distance by province for each taxon
prov.mean.dist <- prov.bio.con.dist %>%
                group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
                summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
write.csv(prov.mean.dist, "prov.mean.distance.csv")

#get average distance by province considering long/lat data (i.e., lumped average)
prov.lmp.dist <- prov.bio.con.dist %>%
                group_by(county, class, NumbSp) %>%
                summarize(lmpMeanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class'. You can override using
## the `.groups` argument.
write.csv(prov.lmp.dist, "prov.lmp.distance.csv")

Plot species record vs average distance

key.lab <- unique(mean.dist$class)
plot.color <- c("#7b3dcc", "#cc3d3d", "#000000", "#d970cb", "#8c994d", "#7f404a","#67E3FC")
point.shape <-  c(0,1,2,3,4,5,6,7,8,9,10)

#plot data with species richness per point
p1 <- ggplot(mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank()) +
            ylab("number of species/record") + 
            xlab("average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#plot data with lumped species richness
p2 <- ggplot(prov.mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank()) +
            ylab("number of species/record") + 
            xlab("average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)

ggarrange(p1,p2, nrow = 1, ncol = 2, common.legend = TRUE, align = "hv",  widths = 20, heights = 5)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

tiff("sp_dist.tif", res=300, width = 7, height = 7, unit="in") 
ggplot(mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank(), legend.position = c(0.83, 0.85), 
              legend.background=element_blank()) +
            ylab("Number of species record") + 
            xlab("Average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2

Get number of conflict sites per province

#we are not sure how the frequency of conflicts were scored in the paper as it was not mentioned
#so two ways can be considered here
#first, we can get frequency based on the number of unique conflict points within each province
#i.e., per point (unique latitude/longitude)
con.sub.2 <- con.sub
con.sub.2 <- con.sub.2 %>% filter(PROVINCE!="") #filter empty rows

con.freq <- con.sub.2 %>%
              group_by(PROVINCE) %>%
              mutate(ConflicFreq = n_distinct(latitude, longitude)) #use only the unique latitude
con.freq <- con.freq %>% distinct(PROVINCE, .keep_all = TRUE)
write.csv(con.freq, "conflict.frequency.unique.csv")

#second, count each row as a unique report of conflict regardless of coordinates
con.freq2 <- con.sub.2 %>%
              group_by(PROVINCE) %>%
              count()
write.csv(con.freq2, "conflict.frequency.rows.csv")

ggplot(data=con.freq2, aes(x=PROVINCE, y=n)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

#combine biodiversity and conflict data (lumped)
prov.comb <- prov.lmp.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
    if_else(county == "Agusan del Sur", 85,
    if_else(county == "Basilan", 329,
    if_else(county == "Bukidnon", 82,
    if_else(county == "Cagayan de Oro", 0,
    if_else(county == "Camiguin Island", 0,
    if_else(county == "Cotabato", 221,
    if_else(county == "Davao", 0,
    if_else(county == "Davao Oriental", 52,
    if_else(county == "Davao de Oro", 113,
    if_else(county == "Davao del Norte", 56, 
    if_else(county == "Davao del sur", 181,
    if_else(county == "Dinagat Island", 0,
    if_else(county == "General Santos", 0, 
    if_else(county == "Lanao del Norte", 66,
    if_else(county == "Lanao del Sur", 181,
    if_else(county == "Maguindanao", 352, 
    if_else(county == "Misamis Occidental", 15,
    if_else(county == "Misamis Oriental", 38,
    if_else(county == "North Cotabato", 0,
    if_else(county == "Sarangani", 25,
    if_else(county == "South Cotabato", 55,
    if_else(county == "Sultan Kudarat", 62,
    if_else(county == "Sulu", 393,
    if_else(county == "Surigao del Norte", 29,
    if_else(county == "Surigao del Sur", 72,
    if_else(county == "Tawi-Tawi", 17,
    if_else(county == "Zamboanga City", 0,
    if_else(county == "Zamboanga Sibugay", 18,
    if_else(county == "Zamboanga del Norte", 34,
    if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(prov.comb, "prov.lmp.comb.csv")

#combine biodiversity and conflict data (per point)
perpoint.comb <- mean.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
    if_else(county == "Agusan del Sur", 85,
    if_else(county == "Basilan", 329,
    if_else(county == "Bukidnon", 82,
    if_else(county == "Cagayan de Oro", 0,
    if_else(county == "Camiguin Island", 0,
    if_else(county == "Cotabato", 221,
    if_else(county == "Davao", 0,
    if_else(county == "Davao Oriental", 52,
    if_else(county == "Davao de Oro", 113,
    if_else(county == "Davao del Norte", 56, 
    if_else(county == "Davao del sur", 181,
    if_else(county == "Dinagat Island", 0,
    if_else(county == "General Santos", 0, 
    if_else(county == "Lanao del Norte", 66,
    if_else(county == "Lanao del Sur", 181,
    if_else(county == "Maguindanao", 352, 
    if_else(county == "Misamis Occidental", 15,
    if_else(county == "Misamis Oriental", 38,
    if_else(county == "North Cotabato", 0,
    if_else(county == "Sarangani", 25,
    if_else(county == "South Cotabato", 55,
    if_else(county == "Sultan Kudarat", 62,
    if_else(county == "Sulu", 393,
    if_else(county == "Surigao del Norte", 29,
    if_else(county == "Surigao del Sur", 72,
    if_else(county == "Tawi-Tawi", 17,
    if_else(county == "Zamboanga City", 0,
    if_else(county == "Zamboanga Sibugay", 18,
    if_else(county == "Zamboanga del Norte", 34,
    if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(perpoint.comb, "perpoint.comb.csv")

Perform GLM

#glm using lumped species richness per province
#assign taxa as a factor
prov.comb$class <- factor(prov.comb$class)
#transform data
prov.comb$distlog <- log(prov.comb$lmpMeanDistance + 1)
prov.comb$freqlog <- log(prov.comb$conflictFreq + 1)

#plot data
h1 <- ggplot(prov.comb, aes(x = lmpMeanDistance, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
h2 <- ggplot(prov.comb, aes(x = conflictFreq, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
ggarrange(h1, h2, nrow = 1, ncol = 2, align = "hv")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#perform glm
glm.res.1 <- glm(NumbSp ~ freqlog + distlog + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.2 <- glm(NumbSp ~ freqlog + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.3 <- glm(NumbSp ~ distlog + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.4 <- glm(NumbSp ~ freqlog + distlog, 
                 family="poisson"(link="log"), data = prov.comb)

#compare glm results
AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
stepAIC(glm.res.1, direction='both') #use stepAIC function
## Start:  AIC=3608.83
## NumbSp ~ freqlog + distlog + class
## 
##           Df Deviance    AIC
## <none>         3017.1 3608.8
## - distlog  1   3029.5 3619.2
## - freqlog  1   3061.7 3651.5
## - class    6   5503.7 6083.4
## 
## Call:  glm(formula = NumbSp ~ freqlog + distlog + class, family = poisson(link = "log"), 
##     data = prov.comb)
## 
## Coefficients:
##    (Intercept)         freqlog         distlog   classAmphibia  classArachnida  
##        3.55329        -0.05149        -0.04438         0.37876         0.46041  
##      classAves    classInsecta   classMammalia   classReptilia  
##        1.32025         1.72873        -0.23824         0.78888  
## 
## Degrees of Freedom: 111 Total (i.e. Null);  103 Residual
## Null Deviance:       5683 
## Residual Deviance: 3017  AIC: 3609
#get summary for best model
summary(glm.res.1)
## 
## Call:
## glm(formula = NumbSp ~ freqlog + distlog + class, family = poisson(link = "log"), 
##     data = prov.comb)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     3.553287   0.126761  28.031  < 2e-16 ***
## freqlog        -0.051487   0.007601  -6.773 1.26e-11 ***
## distlog        -0.044379   0.012537  -3.540  0.00040 ***
## classAmphibia   0.378760   0.078980   4.796 1.62e-06 ***
## classArachnida  0.460405   0.076193   6.043 1.52e-09 ***
## classAves       1.320248   0.066389  19.886  < 2e-16 ***
## classInsecta    1.728728   0.063862  27.070  < 2e-16 ***
## classMammalia  -0.238237   0.083012  -2.870  0.00411 ** 
## classReptilia   0.788883   0.072272  10.915  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5683.2  on 111  degrees of freedom
## Residual deviance: 3017.1  on 103  degrees of freedom
## AIC: 3608.8
## 
## Number of Fisher Scoring iterations: 5
#glm with taxa as a random variable
#pred.scale <- scale(prov.comb[4:5]) #scale predictors
#prov.comb.s <- cbind(prov.comb[2:3], pred.scale)

glm.res.5 <- glmer(NumbSp ~ freqlog + distlog + (1|class), 
                 family = "poisson"(link ="log"), data = prov.comb)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: NumbSp ~ freqlog + distlog + (1 | class)
##    Data: prov.comb
## 
##      AIC      BIC   logLik deviance df.resid 
##   3644.6   3655.5  -1818.3   3636.6      108 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.9359 -3.1715 -0.6495  2.4200 19.6431 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  class  (Intercept) 0.4209   0.6487  
## Number of obs: 112, groups:  class, 7
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.190055   0.269402  15.553  < 2e-16 ***
## freqlog     -0.051447   0.007599  -6.770 1.28e-11 ***
## distlog     -0.044632   0.012535  -3.561  0.00037 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) freqlg
## freqlog -0.096       
## distlog -0.399  0.010
#get R^2
rsquared(glm.res.5, method="trigamma")
#use data per point
#assign taxa as a factor
perpoint.comb$class <- factor(perpoint.comb$class)
#transform data
perpoint.comb$distlog <- log(perpoint.comb$meanDistance + 1)
perpoint.comb$freqlog <- log(perpoint.comb$conflictFreq + 1)

#plot data
h1 <- ggplot(perpoint.comb, aes(x = meanDistance, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
h2 <- ggplot(perpoint.comb, aes(x = conflictFreq, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
ggarrange(h1, h2, nrow = 1, ncol = 2, align = "hv")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#perform glm
glm.res.1 <- glm(NumbSp ~ freqlog + distlog + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.2 <- glm(NumbSp ~ freqlog + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.3 <- glm(NumbSp ~ distlog + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.4 <- glm(NumbSp ~ freqlog + distlog, 
                 family = "poisson"(link ="log"), data = perpoint.comb)

#compare glm results
AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
stepAIC(glm.res.1, direction = 'both') #use stepAIC function
## Start:  AIC=11538.32
## NumbSp ~ freqlog + distlog + class
## 
##           Df Deviance   AIC
## - distlog  1   9615.0 11537
## <none>         9614.6 11538
## - freqlog  1   9734.6 11656
## - class    6  12419.4 14331
## 
## Step:  AIC=11536.65
## NumbSp ~ freqlog + class
## 
##           Df Deviance   AIC
## <none>         9615.0 11537
## + distlog  1   9614.6 11538
## - freqlog  1   9735.8 11656
## - class    6  12455.9 14366
## 
## Call:  glm(formula = NumbSp ~ freqlog + class, family = poisson(link = "log"), 
##     data = perpoint.comb)
## 
## Coefficients:
##    (Intercept)         freqlog   classAmphibia  classArachnida       classAves  
##        2.72365        -0.07196         0.32547        -0.05185         1.06010  
##   classInsecta   classMammalia   classReptilia  
##        1.03821        -0.65465         0.41145  
## 
## Degrees of Freedom: 457 Total (i.e. Null);  450 Residual
## Null Deviance:       12510 
## Residual Deviance: 9615  AIC: 11540
#get summary for best model
summary(glm.res.1)
## 
## Call:
## glm(formula = NumbSp ~ freqlog + distlog + class, family = poisson(link = "log"), 
##     data = perpoint.comb)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.759305   0.082424  33.477  < 2e-16 ***
## freqlog        -0.071763   0.006395 -11.222  < 2e-16 ***
## distlog        -0.004326   0.007521  -0.575    0.565    
## classAmphibia   0.325910   0.059196   5.506 3.68e-08 ***
## classArachnida -0.051769   0.061466  -0.842    0.400    
## classAves       1.059180   0.052478  20.183  < 2e-16 ***
## classInsecta    1.037147   0.051540  20.123  < 2e-16 ***
## classMammalia  -0.654108   0.067277  -9.723  < 2e-16 ***
## classReptilia   0.412545   0.058763   7.021 2.21e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 12508.2  on 457  degrees of freedom
## Residual deviance:  9614.6  on 449  degrees of freedom
## AIC: 11538
## 
## Number of Fisher Scoring iterations: 6
#glm with taxa as a random variable
#pred.scale <- scale(perpoint.comb[6:7]) #scale predictors
#perpoint.comb.s <- cbind(perpoint.comb[2:3], pred.scale)

glm.res.5 <- glmer(NumbSp ~ freqlog + distlog + (1|class), 
                 family = "poisson"(link = "log"), data = perpoint.comb)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: NumbSp ~ freqlog + distlog + (1 | class)
##    Data: perpoint.comb
## 
##      AIC      BIC   logLik deviance df.resid 
##  11575.8  11592.3  -5783.9  11567.8      454 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -6.079 -2.965 -1.288  1.016 36.356 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  class  (Intercept) 0.3216   0.5671  
## Number of obs: 458, groups:  class, 7
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.064396   0.224596  13.644   <2e-16 ***
## freqlog     -0.071702   0.006393 -11.215   <2e-16 ***
## distlog     -0.004403   0.007520  -0.586    0.558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) freqlg
## freqlog -0.082       
## distlog -0.276 -0.056
#get R^2
rsquared(glm.res.5, method = "trigamma")

Calculate the proportion of species occurrence records and conflict sites positioned in different land cover types

#here we calculate the number of sites found in different land cover types 
#this demonstrates the artifact of sampling, which was not considered in the paper
#land cover data of 1988 was from Swedish Space Corporation (SSC 1988)
#retrieved from https://data.humdata.org/dataset/29a3760f-3170-4555-b5d7-1fbd6cfb5a69?force_layout=desktop
#land cover of 2020 was from the Philippine National Mapping and Resource Information Authority
#data was retrieved from geoportal PH (https://www.geoportal.gov.ph/)
#we used land cover as a proxy for tree density and forest canopy data 
#the national land cover data is ground-thruthed
#this is only for simplistic illustration of sampling artifact
#the original land cover format is shapefile
#we converted it to raster file for fast data processing
#data conversion was done in QGIS using the plugin "rasterize" 
#raster pixel size is 0.0008 or approximately 100 meters
#all raster files have the uniform coordinate reference system, resolution and extent

#here is the 2020 land cover of Mindanao
knitr::include_graphics("lc_2020.png")

#read data
files <- list.files("land cover/", pattern = "tif", full.names=TRUE)
raster.files <- lapply(files, raster)

raster.files
## [[1]]
## class      : RasterLayer 
## dimensions : 7356, 9030, 66424680  (nrow, ncol, ncell)
## resolution : 8e-04, 8e-04  (x, y)
## extent     : 119.381, 126.605, 4.586858, 10.47166  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : 1988_lc_mindanao_rast100m.tif 
## names      : X1988_lc_mindanao_rast100m 
## 
## 
## [[2]]
## class      : RasterLayer 
## dimensions : 7356, 9030, 66424680  (nrow, ncol, ncell)
## resolution : 8e-04, 8e-04  (x, y)
## extent     : 119.381, 126.605, 4.586858, 10.47166  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : 2020_lc_mindanao_rast100m.tif 
## names      : X2020_lc_mindanao_rast100m
#resample raster files to have a uniform dimension/resolution
standard <- raster.files[[1]]
raster.resamp <- list(standard)
for (i in 2:length(raster.files)) {
  raster.resamp[[i]] <- resample(raster.files[[i]], standard,
                                 method='bilinear')}
#stack raster files
lc <- stack(raster.files)

#plot data
plot(lc$X1988_lc_mindanao_rast100m)
points(con.sub$longitude, con.sub$latitude)

con.coor <- cbind(con.sub$longitude, con.sub$latitude)

#extract data from land cover using conflict data coordinates
con.lc.data <- extract(lc, con.coor)
colnames(con.lc.data)[1] <- "LC_1988"
colnames(con.lc.data)[2] <- "LC_2020"
con.lc.data <- as.data.frame(na.omit(con.lc.data))
head(con.lc.data)
#land cover types code
lc.88.d <- read.csv("1988_lc_mindanao_code.csv")
lc.88.d
lc.20.d <- read.csv("2020_lc_mindanao_code.csv")
lc.20.d
#add land cover descriptions
con.lc.data.m1 <- con.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 4, "Lake", 
    if_else(LC_1988 == 5,"Cultivated Area mixed with brushland/grassland",  
    if_else(LC_1988 == 7, "Built-up Area",
    if_else(LC_1988 == 13, "Quarry",
    if_else(LC_1988 == 15, "Unclassified",
    if_else(LC_1988 == 37, "Coconut plantations",
    if_else(LC_1988 == 42, "Open Canopy",
    if_else(LC_1988 == 154, "Crop land mixed with coconut plantation",
    if_else(LC_1988 == 487, "Arable land, crops mainly cereals and sugar",
    if_else(LC_1988 == 504, "Coral Reef",
    if_else(LC_1988 == 557, "Riverbeds", 
    if_else(LC_1988 == 667, "Open/Siltation pattern in lake",
    if_else(LC_1988 == 683, "Mangrove vegetation",
    if_else(LC_1988 == 706, "Fishponds derived from mangrove", 
    if_else(LC_1988 == 728, "Marshy area and swamp",
    if_else(LC_1988 == 746, "Closed Canopy",
    if_else(LC_1988 == 756, "Crop land mixed with other plantation",
    if_else(LC_1988 == 776, "Other plantations",
    if_else(LC_1988 == 840, "Grassland, grass covering > 70 percent",
    if_else(LC_1988 == 867, "Open Canopy", "No data")))))))))))))))))))))

con.lc.data.m2 <- con.lc.data %>% mutate(con.lc.data.m1, class_2020 = if_else(LC_2020 == 1, "Grassland", 
    if_else(LC_2020 == 2,"Annual Crop",  
    if_else(LC_2020 == 3, "Marshland/Swamp",
    if_else(LC_2020 == 4, "Perennial Crop",
    if_else(LC_2020 == 5, "Built-up",
    if_else(LC_2020 == 6, "Fishpond",
    if_else(LC_2020 == 7, "Inland Water",
    if_else(LC_2020 == 8, "Mangrove Forest",
    if_else(LC_2020 == 9, "Brush/Shrubs",
    if_else(LC_2020 == 10, "Closed Forest",
    if_else(LC_2020 == 11, "Open Forest", 
    if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))

#count the number of conflict sites in each land cover type
#1988 data
con.lc.count.88 <- con.lc.data.m2 %>%
                      group_by(class_1988) %>%
                      summarise(n())
colnames(con.lc.count.88)[2] <- "count"

ggplot(data=con.lc.count.88, aes(x=class_1988, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

con.lc.count.88 <- as.data.frame(con.lc.count.88)
write.csv(con.lc.count.88, "con.lc.count.88.csv")

#2020 data
con.lc.count.20 <- con.lc.data.m2 %>%
                      group_by(class_2020) %>%
                      summarise(n())
colnames(con.lc.count.20)[2] <- "count"

ggplot(data=con.lc.count.20, aes(x=class_2020, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

con.lc.count.20 <- as.data.frame(con.lc.count.20)
write.csv(con.lc.count.20, "con.lc.count.20.csv")

#extract data from land cover using biodiversity data coordinates
bio.coor <- cbind(bio.sub$decimalLongitude, bio.sub$decimalLatitude)
bio.lc.data <- extract(lc, bio.coor)
colnames(bio.lc.data )[1] <- "LC_1988"
colnames(bio.lc.data )[2] <- "LC_2020"
bio.lc.data <- as.data.frame(na.omit(bio.lc.data ))
head(bio.lc.data)
#add land cover descriptions
bio.lc.data.m1 <- bio.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 4, "Lake", 
    if_else(LC_1988 == 5,"Cultivated Area mixed with brushland/grassland",  
    if_else(LC_1988 == 7, "Built-up Area",
    if_else(LC_1988 == 13, "Quarry",
    if_else(LC_1988 == 15, "Unclassified",
    if_else(LC_1988 == 37, "Coconut plantations",
    if_else(LC_1988 == 42, "Open Canopy",
    if_else(LC_1988 == 154, "Crop land mixed with coconut plantation",
    if_else(LC_1988 == 487, "Arable land, crops mainly cereals and sugar",
    if_else(LC_1988 == 504, "Coral Reef",
    if_else(LC_1988 == 557, "Riverbeds", 
    if_else(LC_1988 == 667, "Open/Siltation pattern in lake",
    if_else(LC_1988 == 683, "Mangrove vegetation",
    if_else(LC_1988 == 706, "Fishponds derived from mangrove", 
    if_else(LC_1988 == 728, "Marshy area and swamp",
    if_else(LC_1988 == 746, "Closed Canopy",
    if_else(LC_1988 == 756, "Crop land mixed with other plantation",
    if_else(LC_1988 == 776, "Other plantations",
    if_else(LC_1988 == 840, "Grassland, grass covering > 70 percent",
    if_else(LC_1988 == 867, "Open Canopy", "No data")))))))))))))))))))))

bio.lc.data.m2<- bio.lc.data %>% mutate(bio.lc.data.m1, class_2020 = if_else(LC_2020 == 1, "Grassland", 
    if_else(LC_2020 == 2,"Annual Crop",  
    if_else(LC_2020 == 3, "Marshland/Swamp",
    if_else(LC_2020 == 4, "Perennial Crop",
    if_else(LC_2020 == 5, "Built-up",
    if_else(LC_2020 == 6, "Fishpond",
    if_else(LC_2020 == 7, "Inland Water",
    if_else(LC_2020 == 8, "Mangrove Forest",
    if_else(LC_2020 == 9, "Brush/Shrubs",
    if_else(LC_2020 == 10, "Closed Forest",
    if_else(LC_2020 == 11, "Open Forest", 
    if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))

#count the number of conflict sites in each land cover type
#1988 data
bio.lc.count.88 <- bio.lc.data.m2 %>%
                      group_by(class_1988) %>%
                      summarise(n())
colnames(bio.lc.count.88)[2] <- "count"

ggplot(data=bio.lc.count.88, aes(x=class_1988, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

bio.lc.count.88 <- as.data.frame(bio.lc.count.88)
write.csv(bio.lc.count.88, "bio.lc.count.88.csv")

#2020 data
bio.lc.count.20 <- bio.lc.data.m2 %>%
                      group_by(class_2020) %>%
                      summarise(n())
colnames(bio.lc.count.20)[2] <- "count"

bio.lc.count.20 <- as.data.frame(bio.lc.count.20)

ggplot(data=bio.lc.count.20, aes(x=class_2020, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

bio.lc.count.20 <- as.data.frame(bio.lc.count.20)
write.csv(bio.lc.count.20, "bio.lc.count.20.csv")